from langchain.chat_models import init_chat_model
from langchain_core.globals import set_llm_cache
from langchain_core.caches import InMemoryCache
import os
import time

key = os.getenv("OPENAI_API_KEY")
# print(key)
api_key = str(key)

llm = init_chat_model(
    model="gpt-4o-mini",
    model_provider="openai",
    base_url="https://api.zetatechs.com/v1",
    api_key=api_key
)

start_time = time.time()
set_llm_cache(InMemoryCache())
# The first time, it is not yet in cache, so it should take longer
print(llm.invoke("Tell me a joke"))
end_time = time.time()
print(f"First time: {end_time - start_time} seconds")
